/*
Copyright (c) 2013. The YARA Authors. All Rights Reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/* Lexical analyzer for hex strings */

%{

/* Disable warnings for unused functions in this file.

As we redefine YY_FATAL_ERROR macro to use our own function hex_yyfatal, the
yy_fatal_error function generated by Flex is not actually used, causing a
compiler warning. Flex doesn't offer any options to remove the yy_fatal_error
function. When they include something like %option noyy_fatal_error as they do
with noyywrap then we can remove this pragma.
*/

#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wunused-function"
#endif

#include <setjmp.h>

#include <yara/globals.h>
#include <yara/limits.h>
#include <yara/error.h>
#include <yara/mem.h>
#include <yara/re.h>
#include <yara/threading.h>
#include <yara/strutils.h>
#include <yara/hex_lexer.h>

#include "hex_grammar.h"

#ifdef _WIN32
#define snprintf _snprintf
#endif

#define ERROR_IF(x, error) \
    if (x) \
    { \
      RE_AST* re_ast = yyget_extra(yyscanner); \
      re_ast->error_code = error; \
      YYABORT; \
    } \

%}

%option reentrant bison-bridge
%option noyywrap
%option nounistd
%option noinput
%option nounput
%option never-interactive
%option yylineno
%option prefix="hex_yy"

%option outfile="lex.yy.c"

%option verbose
%option warn

digit         [0-9]
letter        [a-zA-Z]
hexdigit      [a-fA-F0-9]

%x comment
%x range

%%


<INITIAL>{hexdigit}{2}  {

  yylval->integer = xtoi(yytext);
  return _BYTE_;
}

<INITIAL>{hexdigit}\?  {

  yytext[1] = '0'; // replace ? by 0
  yylval->integer = xtoi(yytext) | 0xF000 ;
  return _MASKED_BYTE_;
}

\?{hexdigit}  {

  yytext[0] = '0'; // replace ? by 0
  yylval->integer = xtoi(yytext) | 0x0F00 ;
  return _MASKED_BYTE_;
}

\?\? {

  yylval->integer = 0x0000;
  return _MASKED_BYTE_;
}

{hexdigit} {

  yyerror(yyscanner, lex_env, "uneven number of digits in hex string");
  yyterminate();
}

\[ {

  BEGIN(range);
  return yytext[0];
}

"/*" {

  BEGIN(comment);
}

<comment>"*/" {

  BEGIN(INITIAL);
}

<comment>.|\n      // skip comments

"//".*             // skip single-line comments

<range>\- {

  return yytext[0];
}

<range>{digit}+ {

  yylval->integer = atoi(yytext);
  return _NUMBER_;
}

<range>\] {

  BEGIN(INITIAL);
  return yytext[0];
}

<range>[ \t\r\n]   // skip whitespaces

<range>. {

  yyerror(yyscanner, lex_env, "invalid character in hex string jump");
  yyterminate();
}

[ \t\r\n]         // skip whitespaces

[{}()|]  {        // pass valid characters to the parser

  return yytext[0];
}

. {               // reject all other characters

  yyerror(yyscanner, lex_env, "invalid character in hex string");
  yyterminate();
}

%%

//
// yyfatal (actually named hex_yyfatal because of the '%option prefix="hex_yy"'
// directive) is called when a fatal error occurs in the parser. When this
// happens we are deep inside the parsing logic generated by flex/bison and
// the only way to exit gracefully from there is using setjmp/longjmp.
//
void yyfatal(
    yyscan_t yyscanner,
    const char *error_message)
{
  jmp_buf* recovery_trampoline = (jmp_buf*) yr_thread_storage_get_value(
      &yr_yyfatal_trampoline_tls);

  // Never returns.
  longjmp(*recovery_trampoline, 1);
}

void yyerror(
    yyscan_t yyscanner,
    HEX_LEX_ENVIRONMENT* lex_env,
    const char *error_message)
{
  // if lex_env->last_error was set to some error code before
  // don't overwrite it, we are interested in the first error, not in
  // subsequent errors like "syntax error, unexpected $end" caused by
  // early parser termination.

  if (lex_env->last_error == ERROR_SUCCESS)
  {
    lex_env->last_error = ERROR_INVALID_HEX_STRING;

    strlcpy(
        lex_env->last_error_message,
        error_message,
        sizeof(lex_env->last_error_message));
  }
}


int yr_parse_hex_string(
    const char* hex_string,
    RE_AST** re_ast,
    RE_ERROR* error)
{
  yyscan_t yyscanner;
  jmp_buf recovery_trampoline;
  HEX_LEX_ENVIRONMENT lex_env;

  lex_env.last_error = ERROR_SUCCESS;
  lex_env.inside_or = 0;

  yr_thread_storage_set_value(
     &yr_yyfatal_trampoline_tls,
     &recovery_trampoline);

  if (setjmp(recovery_trampoline) != 0)
    return ERROR_INTERNAL_FATAL_ERROR;

  FAIL_ON_ERROR(yr_re_ast_create(re_ast));

  // The RE_FLAGS_FAST_REGEXP flag indicates a regular expression can be
  // matched by faster algorithm. These regular expressions come from hex
  // strings that do not contain alternatives, like in:
  //
  // { ( 01 02 | 03 04) 05 06 }.
  //
  // This flag is unset later during parsing if alternatives are used.

  (*re_ast)->flags |= RE_FLAGS_FAST_REGEXP;

  // Set RE_FLAGS_DOT_ALL because in hex strings the "dot" (?? in this case)
  // must match all characters including new-line.

  (*re_ast)->flags |= RE_FLAGS_DOT_ALL;

  yylex_init(&yyscanner);
  yyset_extra(*re_ast, yyscanner);
  yy_scan_string(hex_string, yyscanner);
  yyparse(yyscanner, &lex_env);
  yylex_destroy(yyscanner);

  if (lex_env.last_error != ERROR_SUCCESS)
  {
    strlcpy(error->message, lex_env.last_error_message, sizeof(error->message));
    return lex_env.last_error;
  }

  return ERROR_SUCCESS;
}
